
		**********************************************************
		** File: FM_data_merging   								**
		** Paper: Correlates of Voter Turnout 					**
		** Author: Richard Frank
		** Date: 17 Feb 2020 									**
		** Task: merging various datasets used in this paper 	**
		**********************************************************

		clear
		version 16.0
		set seed 1234
		set more off
		clear matrix
		clear mata

		* change to your working directory
		cd "/~~~/"
 

	
	
						************************
						*** 2.1.  WORLD BANK ***  
						************************
		
		clear
		* change to your working directory
		cd "/~~~/"

		************* 1
		** 6. GDP **
		
			** VARIOUS OPERATIONALISATIONS **
			** Na.log GDP per capita (4); 
			** real gdp per capita log (2); 
			** log of GDP at purchaser's prices (2);
			** Real GDP per cap (chain series) in intl dollars, 2000 const prices (1); 
			** Per capita GDP measured in constant 1990 dollars (1); 
			** per capita GDP, measured at PPP using the 1995 international prices (1); 
			** real GDP per capita in constant US dollars using the Chain index base on PPP (1);  
			** lagged one year and based on constant 2000 US dollars (1); 
			** measured at current US dollars (1); 
			** GDP per capita in year of the election (1);   
			** gdp in current prices (1); NA (2)
		
			 wbopendata, language(en - English) indicator(ny.gdp.mktp.kd) long clear
			 rename countryname Country
			 run "cow.do"
			 drop if ccode==0
			 collapse (sum) ny_gdp_mktp_kd, by(ccode year)
			 label var ny_gdp_mktp_kd "6 GDP (constant 2000 US$) (WB)"
			 rename ny_gdp_mktp_kd gdpconstant2000
			 save WDI_gdp.dta, replace
		
			 wbopendata, language(en - English) indicator(ny.gdp.pcap.pp.kd) long clear
			 rename countryname Country
			 run "cow.do"
			 rename Country country
			 drop if ccode==0
			 collapse (first) country (sum) ny_gdp_pcap_pp_kd, by(ccode year)
			 label var ny_gdp_pcap_pp_kd "6 GDP per capita, ppp (const.intl.$2011)(WB)"
			  rename ny_gdp_pcap_pp_kd gdp_pc_ppp
			  replace gdp_pc_ppp = . if gdp_pc_ppp==0
			 save WDI_gdp_ppp_pc.dta, replace
	 
		************************ 2
		** 30. GNP per capita **
		
			** log of GNP per capita in constant US dollars measured as 
				** GNP per capita in current US dollars divided by corresponding 
				** US GDP deflator (2); 
			** proportion in real change in real per capita national income (1); 
	
			wbopendata, language(en - English) indicator(ny.gnp.pcap.kd) long clear
			rename countryname Country
			run "cow.do"
			rename Country country
			drop if ccode==0
			collapse (first) country (sum) ny_gnp_pcap_kd, by(ccode year)
			label var ny_gnp_pcap_kd "30 GNI per capita, (const. US2010) (WB)"
			rename ny_gnp_pcap_kd gni_pc_constant
			replace gni_pc_constant = . if gni_pc_constant==0
			gen gni_ln=ln(gni_pc_constant)
			label var gni_ln "30 GNI per capita, ln (const 2010 USD) (WB)"
			save WDI_gni_pc_cons.dta, replace
	
		******************** 3
		** 60. GNP growth **
		
			** Annual percentage increase or decrease in GNP per capita in the 
			** election year compared to the previous year 
			** (based on gross national product indicator expressed in local currency)
		
			wbopendata, language(en - English) indicator(ny.gnp.pcap.kd.zg) long clear
			rename countryname Country
			run "cow.do"
			rename Country country
			drop if ccode==0
			collapse (first) country (sum) ny_gnp_pcap_kd_zg, by(ccode year)
			label var ny_gnp_pcap_kd_zg "60 GNI pc growth (annual %) (WB)"
			rename ny_gnp_pcap_kd_zg gni_pc_growth
			replace gni_pc_growth = . if gni_pc_growth==0
			save WDI_gni_pc_growth.dta, replace
		
		******************** 4
		** 13. GDP growth **
		
			**  GDP per capita growh for the election year 
			** (2); percentage change in GDP in current international dollars (in thousands), 
			** PPP at the time of the survey (1); 
			** percentage change over previous year's value for per capita GDP (1); 
			** Annual percentage change in real GDP (1); 
			** percentage change in real GDP (1); 
			** the election year proportiona change in real per capita national income (1); 
			** change in gdp per capita (1); economic growth rate (1) 
		
			wbopendata, language(en - English) indicator(ny.gdp.pcap.kd.zg) long clear
			rename countryname Country
			run "cow.do"
			rename Country country
			drop if ccode==0
			collapse (first) country (sum) ny_gdp_pcap_kd_zg, by(ccode year)
			label var ny_gdp_pcap_kd_zg "13 GDP pc growth (annual %)"
			rename ny_gdp_pcap_kd_zg gdp_pc_growth
			replace gdp_pc_growth = . if gdp_pc_growth==0
			save WDI_gdp_pc_growth.dta, replace
		
		******************* 5
		** 8. Population **
		
			** 11 log; 3 total population; 
			** 1 dummy (small state any country with less than 2 millions); 1 NA
		
			 wbopendata, language(en - English) indicator(sp.pop.totl) long clear
			 rename countryname Country
			 run "cow.do"
			 drop if ccode==0
			 gen cyr=(ccode*10000) + year
			 sort cyr
			  collapse (sum) sp_pop_totl, by(ccode year)
			 label var sp_pop_totl "8 Population, total"
			 rename sp_pop_totl population
			 replace population=. if populati==0
			 save WDI_pop_total.dta, replace
			
		**************************** 6
		** 29. Population density **
		
			 wbopendata, language(en - English) indicator(en.pop.dnst) long clear
			 rename countryname Country
			 run "cow.do"
			 drop if ccode==0
			 gen cyr=(ccode*10000) + year
			 sort cyr
			  collapse (sum) en_pop_dnst, by(ccode year)
			 label var en_pop_dnst "29 Population density,people per sq.km land area (WB)"
			 rename en_pop_dnst pop_density
			 replace pop_density=. if pop_density==0
			 gen pop_density_ln = ln(pop_density)
			 label var pop_density_ln "29 Pop. density,people per sq.km land area, ln (WB)"
			 
			 save WDI_pop_density.dta, replace	
		
		********************************* 7
		** 32. Average life expectancy **
	
			** Average life expectancy, number of years (2); measured at birth (1)
		
			 wbopendata, language(en - English) indicator(sp.dyn.le00.in) long clear
			 rename countryname Country
			 run "cow.do"
			 drop if ccode==0
			  collapse (sum) sp_dyn_le00_in, by(ccode year)
			 label var sp_dyn_le00_in "32 Life expectancy at birth, tot yrs (WB)"
			 rename sp_dyn_le00_in life_exp
			 replace life_exp=. if life_exp==0
			 save WDI_life_expectancy.dta, replace	
		
		********************** 8
		** 16. Urbanization **
			
			** percentage of the total population living in urban areas (6); 
			** the people living in urban areas as defined by national statistical offices (1); NA (1)
		
			 wbopendata, language(en - English) indicator(sp.urb.totl.in.zs) long clear
			 rename countryname Country
			 run "cow.do"
			 drop if ccode==0
			 collapse (sum) sp_urb_totl_in_zs, by(ccode year)
			 label var sp_urb_totl_in_zs "16 Urban population, % total (WB)"
			 rename sp_urb_totl_in_zs urban_pop_pct
			 replace urban_pop_pct=. if urban_pop_pct==0
			 save WDI_urban_pop.dta, replace	
		
		******************* 9
		** 51. Inflation **
		
			** log inflation (1); inflation in t-1 **

			 wbopendata, language(en - English) indicator(fp.cpi.totl.zg) long clear
			 rename countryname Country
			 run "cow.do"
			 drop if ccode==0
			  collapse (sum) fp_cpi_totl_zg, by(ccode year)
			 label var fp_cpi_totl_zg "51 Inflation, consumer prices (WB)"
			 rename fp_cpi_totl_zg inflation
			 replace inflation=. if inflation==0
			 save WDI_inflation.dta, replace	
			
			
		********************** 10
		** 52. Unemployment **
		
			** % of economically active population in % in t-1 
		
			 wbopendata, language(en - English) indicator(sl.uem.totl.ne.zs) long clear
			 rename countryname Country
			 run "cow.do"
			 drop if ccode==0
			  collapse (sum) sl_uem_totl_ne_zs, by(ccode year)
			 label var sl_uem_totl_ne_zs "52 Unemployment, total, %labor force (WB)"
			 rename sl_uem_totl_ne_zs unemployment
			 replace unemployment=. if unemployment==0
			 save WDI_unemployment.dta, replace	
		
		*************************** 11
		** 20. Inequality (Gini) **	

			 wbopendata, language(en - English) indicator(si.pov.gini) long clear
			 rename countryname Country
			 run "cow.do"
			 drop if ccode==0
			  collapse (sum) si_pov_gini, by(ccode year)
			 label var si_pov_gini "20 Inequality, gini index (WB)"
			 rename si_pov_gini gini
			 replace gini=. if gini==0
			 save WDI_gini.dta, replace	
		
		****************** 12
		** 19. Literacy **
		
			** the proportion of adult illiterate population (2); 
			** % of the population aged 15 years and older that can read & write (2);
			** tot enrolment in pri. & sec. education as a % relevant age group in the population (1): 
			** secondary school enrolment measured as a % of total population (1); 
			** %of population (usually defined as teh population over age 15) that is literate (1);
		
			 wbopendata, language(en - English) indicator(se.adt.litr.zs) long clear
			 rename countryname Country
			 run "cow.do"
			 drop if ccode==0
			  collapse (sum) se_adt_litr_zs, by(ccode year)
			 label var se_adt_litr_zs "19 Literacy, adult tot. % of people >15 (WB)"
			 rename se_adt_litr_zs literacy
			 replace literacy=. if literacy==0
			 save WDI_literacy.dta, replace	
		
		********************************************
		** 74, 75, 76 measures of decentralisation
		** These come from both Marks et al 2008 and the WB
		** the first source is cross-sectional and no country names
		** So, I use the WB data
		** the article doesnt make it clear what WB data they use
		** So I use the overall first & second spreadsheet
		** source: http://www1.worldbank.org/publicsector/decentralization/fiscalindicators.htm
		** two sources for decentralization indicators. The first comes from Marks et al (2008; see also Hooghe et al, 2008) and provides scores for regional authority for fortytwo countries during the period 1950 ^ 2006.(8) The second comes from the World Bank and provides indicators of fiscal decentralization for over 100 countries during the period 1972 ^ 2000; this dataset has, however, many missing observations.
 		
			** 74 Sub-national Expenditures (% of total expenditures) **
			
			import excel "WB decentralisation Indicators.xls", sheet("ExpShare") cellrange(A3:AD114) firstrow clear
			drop if A==""
			drop if A=="The following countries use Budgetary rather than consolidated accounts:"
			drop if A=="China, Fiji, Papau New Guinea, Philippines, Kygyz Republic, Ecuador, El Salvador, Jordan, Bangladesh, Botswana, Kenya, Malawi, Swaziland, Uganda, New Zealand"
			reshape long y , i(A)
			rename _j year
			rename A Country
			 run "cow.do"
			sort ccode year
			drop if ccode==0
			rename y subnat_exp
			label var subnat_exp "74 Sub-national Expenditures (% of total expenditures) (WB)"			
			
			
			save WB_subnat_exp.dta, replace
			
			** 75 Sub-national Revenues (% of total revenues)
			
			import excel "~/WB decentralisation Indicators.xls", sheet("RevShare") cellrange(A3:AD112) firstrow clear

			
			drop if A==""
			drop if A=="China, Fiji, Papau New Guinea, Philippines, Kygyz Republic, Ecuador, El Salvador, Guatemala, Jordan, Bangladesh, Botswana, Kenya, Malawi, Somalia, Sudan, Swaziland, Uganda, New Zealand" 
			reshape long y , i(A)
			rename _j year
			rename A Country
			 run "cow.do"
			sort ccode year
			drop if ccode==0
			rename y subnat_rev
			label var subnat_rev "75 Sub-national Revenues (% of total revenues) (WB)"
			save WB_subnat_rev.dta, replace
 	
		** MERGING WB files into one **
 
			clear
			
			use WDI_gdp_ppp_pc.dta 
			merge 1:1 ccode year using WDI_gni_pc_cons.dta 
			tab _merge
			drop _merge				
			merge 1:1 ccode year using WDI_gdp_pc_growth.dta 
			tab _merge
			drop _merge							
			merge 1:1 ccode year using WDI_gni_pc_growth.dta
			tab _merge
			drop _merge					
			merge 1:1 ccode year using WDI_pop_total.dta
			tab _merge
			drop _merge		
			merge 1:1 ccode year using WDI_pop_density.dta 
			tab _merge
			drop _merge					
			merge 1:1 ccode year using WDI_life_expectancy.dta
			tab _merge
			drop _merge					
			merge 1:1 ccode year using WDI_urban_pop.dta
			tab _merge
			drop _merge					
			merge 1:1 ccode year using WDI_inflation.dta
			tab _merge
			drop _merge				
			merge 1:1 ccode year using WDI_unemployment.dta
			tab _merge
			drop _merge				
			merge 1:1 ccode year using WDI_gini.dta
			tab _merge
			drop _merge				
			merge 1:1 ccode year using WDI_literacy.dta
			tab _merge
			drop _merge				 
			merge 1:1 ccode year using WB_subnat_exp.dta
			tab _merge
			drop _merge				 
			merge 1:1 ccode year using WB_subnat_rev.dta
			tab _merge
			drop _merge				
 				
			drop Country
			
			save "temp_WB.dta", replace
 	
	
							*****************
							** 2.3. POLITY **  
							*****************
	
		use "Polity 2018.dta", clear
 
		** 10. Democracy
		
			** Polity IV scores in 1999 (1); 
			** Polity IV scores (1);  
			** a 'fully democratic' is a score of "1" on the 7-point FH scale of ///
			** political rights, wihe a 'partially democratic' country is any country ///
			*** with a score of '2' on this scale; Freedom House uses separate ///
			** political rights and civil liberties indexes, both of which are ///
			** measured from 1 (high freedom) to 7 (low freedom). We first summed ///
			** these into a composite index representing the level of democracy. ///
			** In Latin American countries over the course of this study, the ///
			** composite scale ranges from 2 to 12. We then reverse the index ///
			** and subtract 2 from it, resulting in an 11-point variable ranging ///
			** from 0 to 10 and measured so that high scores represent higher ///
			** levels of democracy (1); Gastil Index (1); Freedom House employs ///
			** separate political rights and civil liberties indexes, both of which //
			** are measured from 1 (high freedom) to 7 (low freedom). We summed these into ///
			** a composite index representing the level of democracy and recoded them 
			
			label var polity2 "10 Polity2 (Polity IV)"


		keep ccode year polity2    	
		** 105. Prior democratic experience 
		
			** Experience with democratic governance in the fifty years preceding 
			** the first election in our data set. 
			** This is measured as the sum of the annualized observations for 
			** the Revised Combined Polity Score (variable name POLITY2)
			
		sort ccode year
		gen first=0
		label var first "first election in dataset"
		replace first=1 if ccode==2 & year==1946
		replace first=1 if ccode==20 & year==1945
		replace first=1 if ccode==31  & year==	1977
		replace first=1 if ccode==40  & year==	1946		
		replace first=1 if ccode==41  & year==	1946	 		
		replace first=1 if ccode==42  & year==	1947		
		replace first=1 if ccode==51  & year==	1962
		replace first=1 if ccode==52  & year==	1966	
		replace first=1 if ccode==53  & year==	1966	
		replace first=1 if ccode==54  & year==	1980			
		replace first=1 if ccode==55  & year==	1976
		replace first=1 if ccode==56  & year==	1979		
		replace first=1 if ccode==57  & year==	1979	
		replace first=1 if ccode==58  & year==	1984	
		replace first=1 if ccode==60  & year==	1984
		replace first=1 if ccode==70  & year==	1946		
		replace first=1 if ccode==80  & year==	1984		
		replace first=1 if ccode==90  & year==	1947			
		replace first=1 if ccode==91  & year==	1948
		replace first=1 if ccode==92  & year==	1952		
		replace first=1 if ccode==93  & year==	1947		
		replace first=1 if ccode==94  & year==	1946			
		replace first=1 if ccode==95  & year==	1948
		replace first=1 if ccode==100 & year==	1945		
		replace first=1 if ccode==101 & year==	1947		
		replace first=1 if ccode==110 & year==	1968			
		replace first=1 if ccode==115  & year==	1977		
		replace first=1 if ccode==130  & year==	1950		
		replace first=1 if ccode==135  & year==	1945			
		replace first=1 if ccode==140  & year==	1947
		replace first=1 if ccode==145  & year==	1951		
		replace first=1 if ccode==150  & year==	1948		
		replace first=1 if ccode==155  & year==	1945			
		replace first=1 if ccode==160  & year==	1946
		replace first=1 if ccode==165  & year==	1946		
		replace first=1 if ccode==200  & year==	1945		
		replace first=1 if ccode==205  & year==	1948			
		replace first=1 if ccode==210  & year==	1946
		replace first=1 if ccode==211  & year==	1946		
		replace first=1 if ccode==212  & year==	1954	
		replace first=1 if ccode==220  & year==	1946		
		replace first=1 if ccode==223  & year==	1993
		replace first=1 if ccode==225  & year==	1947			
		replace first=1 if ccode==230  & year==	1977		
		replace first=1 if ccode==232  & year==	1993	
		replace first=1 if ccode==235  & year== 1945
		replace first=1 if ccode==255  & year==	1990		
		replace first=1 if ccode==260  & year==	1949			
		replace first=1 if ccode==265  & year==	1950		
		replace first=1 if ccode==290  & year==	1947
		replace first=1 if ccode==305  & year==	1945		
		replace first=1 if ccode==310  & year==	1945		
		replace first=1 if ccode==315  & year==	1946		 
		replace first=1 if ccode==316  & year==	1996
		replace first=1 if ccode==317  & year==	1994			
		replace first=1 if ccode==325  & year==	1948		
		replace first=1 if ccode==331  & year==	1993		 
		replace first=1 if ccode==338  & year==	1966		 
		replace first=1 if ccode==339  & year==	1945	 
		replace first=1 if ccode==341  & year==	2009		 
		replace first=1 if ccode==343  & year==	1994		
		replace first=1 if ccode==344  & year==	1992	
		replace first=1 if ccode==345  & year==	1950		 
		replace first=1 if ccode==346  & year==	1996		 
		replace first=1 if ccode==347  & year==	2010		 
		replace first=1 if ccode==349  & year==	1992		 
		replace first=1 if ccode==350  & year==	1946		 
		replace first=1 if ccode==352  & year==	1970		 
 		replace first=1 if ccode==355  & year==	1945			 
		replace first=1 if ccode==359  & year==	1994		 
		replace first=1 if ccode==360  & year==	1952	 
		replace first=1 if ccode==365  & year==	1946			 
		replace first=1 if ccode==366  & year==	1990			
		replace first=1 if ccode==367  & year==	1993	
		replace first=1 if ccode==368  & year==	1992		 
		replace first=1 if ccode==369  & year==	1994		 
		replace first=1 if ccode==370  & year==	1995			 
		replace first=1 if ccode==371  & year==	1995	 		 
		replace first=1 if ccode==372  & year==	1992			 
		replace first=1 if ccode==373  & year==	1995
		replace first=1 if ccode==375  & year==	1945		 
		replace first=1 if ccode==380  & year==	1948			
		replace first=1 if ccode==385  & year==	1945	
		replace first=1 if ccode==390  & year==	1945			 
		replace first=1 if ccode==395  & year==	1946		 
		replace first=1 if ccode==402  & year==	1991			 
		replace first=1 if ccode==403  & year==	1991		 
		replace first=1 if ccode==404  & year==	1994		 
		replace first=1 if ccode==411  & year==	1983 		 
		replace first=1 if ccode==420  & year==	1966		
		replace first=1 if ccode==432  & year==	1964	
		replace first=1 if ccode==433  & year==	1963		 
		replace first=1 if ccode==434  & year==	1960			 
		replace first=1 if ccode==435  & year==	1965		 
		replace first=1 if ccode==436  & year==	1965		 
		replace first=1 if ccode==437  & year==	1960			
 		replace first=1 if ccode==438  & year==	1963		 
		replace first=1 if ccode==439  & year==	1965		 
		replace first=1 if ccode==450  & year==	1951	
		replace first=1 if ccode==451  & year==	1962			 
		replace first=1 if ccode==452  & year==	1969	 		 
		replace first=1 if ccode==461  & year==	1961	 
		replace first=1 if ccode==471  & year==	1960		 
		replace first=1 if ccode==475  & year==	1964			 
		replace first=1 if ccode==481  & year==	1961		
 		replace first=1 if ccode==482  & year==	1964		 
		replace first=1 if ccode==483  & year==	1962		 
		replace first=1 if ccode==484  & year==	1963		
		replace first=1 if ccode==490  & year==	1965		 
		replace first=1 if ccode==500  & year==	1980			 
		replace first=1 if ccode==501  & year==	1966	 
		replace first=1 if ccode==510  & year==	1965		 
		replace first=1 if ccode==516  & year==	1965		 
		replace first=1 if ccode==517  & year==	1965			 
 		replace first=1 if ccode==520  & year==	1964		 
		replace first=1 if ccode==522  & year==	1982		 
		replace first=1 if ccode==530  & year==	1957		 
 		replace first=1 if ccode==540  & year==	1992		 
		replace first=1 if ccode==541  & year==	1977		 
		replace first=1 if ccode==551  & year==	1968	
		replace first=1 if ccode==552  & year==	1970		 
		replace first=1 if ccode==553  & year==	1978			 
		replace first=1 if ccode==560  & year== 1948 
		replace first=1 if ccode==565  & year==	1994		 
		replace first=1 if ccode==570  & year==	1970		 
		replace first=1 if ccode==571  & year==	1969			
 		replace first=1 if ccode==572  & year==	1972			 
		replace first=1 if ccode==580  & year==	1960		 
		replace first=1 if ccode==581  & year==	1978	
		replace first=1 if ccode==590  & year==	1976			 
		replace first=1 if ccode==600  & year==	1963		 
		replace first=1 if ccode==615  & year==	1964		  
		replace first=1 if ccode==616  & year==	1959		 
		replace first=1 if ccode==620  & year==	1952			 
		replace first=1 if ccode==625  & year==	1958		 
		replace first=1 if ccode==630  & year==	1960			 
		replace first=1 if ccode==640  & year==	1946		 
		replace first=1 if ccode==645  & year==	1947	 
		replace first=1 if ccode==651  & year==	1945	 
		replace first=1 if ccode==652  & year==	1947		 
		replace first=1 if ccode==660  & year==	1947			 
		replace first=1 if ccode==663  & year==	1947		 
		replace first=1 if ccode==666  & year==	1949	 
		replace first=1 if ccode==678  & year==	1971			 
		replace first=1 if ccode==680  & year==	1978		 
		replace first=1 if ccode==690  & year==	1963		 
		replace first=1 if ccode==692  & year==	1973			 
		replace first=1 if ccode==698  & year==	2003		 
		replace first=1 if ccode==700  & year==	1949		 
		replace first=1 if ccode==701  & year==	1994			 
		replace first=1 if ccode==702  & year==	1995		 
		replace first=1 if ccode==703  & year==	1995		 
		replace first=1 if ccode==704  & year==	1994		 
		replace first=1 if ccode==705  & year==	1994		 
		replace first=1 if ccode==712  & year==	1951		 
		replace first=1 if ccode==713  & year==	1983		 
		replace first=1 if ccode==731  & year==	1957		 
		replace first=1 if ccode==732  & year==	1950	 
		replace first=1 if ccode==740  & year==	1946			 
		replace first=1 if ccode==750  & year== 1951			 
		replace first=1 if ccode==760  & year==	2007		 
		replace first=1 if ccode==770  & year== 1970	 	 	 	 	 
 		replace first=1 if ccode==771  & year==	1973			 
		replace first=1 if ccode==775  & year==	1951		 
		replace first=1 if ccode==780  & year==	1952			 
		replace first=1 if ccode==781  & year==	2009	
 		replace first=1 if ccode==790  & year==	1959			 
		replace first=1 if ccode==800  & year==	1946		 
		replace first=1 if ccode==811  & year==	1955		 
		replace first=1 if ccode==812  & year==	1955	
		replace first=1 if ccode==816  & year==	1960		 
		replace first=1 if ccode==817  & year==	1959		  
		replace first=1 if ccode==820  & year==	1959		 
		replace first=1 if ccode==830  & year==	1968			  
		replace first=1 if ccode==840  & year==	1946		 
		replace first=1 if ccode==850  & year==	1955		 
		replace first=1 if ccode==860  & year==	2007		 
		replace first=1 if ccode==900  & year==	1946		 
		replace first=1 if ccode==910  & year==	1977		 
		replace first=1 if ccode==920  & year==	1946	 
		replace first=1 if ccode==935  & year==	1983		 
		replace first=1 if ccode==940  & year==	1980		 
 		replace first=1 if ccode==946  & year==	1982		 
		replace first=1 if ccode==947  & year==	1981		 
 		replace first=1 if ccode==950  & year==	1972		 
		replace first=1 if ccode==970  & year==	1971	
		replace first=1 if ccode==983  & year==	1991		 
		replace first=1 if ccode==986  & year==	1996		
		replace first=1 if ccode==987  & year==	1991		 
 		 
		gen first_y=year if first==1
		by ccode: egen fy=max(first_y)
		gen next=fy-50
		gen c1=0
		replace c1=1 if year==next
		gen try=1 if year<fy & year>=next
		
		bysort ccode try: egen dem_exp=total(polity2)
		replace dem_exp=. if try==.
		
		label var dem_exp "105 Prior democratic experience (Polity)"
		drop first first_y fy next c1 try
		 
		
		duplicates report ccode year
		save "temp_PO.dta", replace
		
						************************
						** 2.4. FREEDOM HOUSE ** 
						************************
	
 	
			use "FH 2019.dta" , clear	
		
			 
		** 10. Quality of democracy 
		
			** dummy when the election was held in a country that obtained a 
			** score of 1 on political rights. (1); 
			** a 'fully democratic' is a score of "1" on the 7-point FH scale of 
			** political rights, wihe a 'partially democratic' country is any country 
			**with a score of '2' on this scale; Freedom House uses separate political 
			**rights and civil liberties indexes, both of which are measured from 1 
			**(high freedom) to 7 (low freedom). We first summed these into a composite 
			**index representing the level of democracy. In Latin American countries 
			** over the course of this study, the composite scale ranges from 2 to 12.
			**We then reverse the index and subtract 2 from it, resulting in an 11-point
			**variable ranging from 0 to 10 and measured so that high scores represent higher 
			*levels of democracy (1); Gastil Index (1); Freedom House employs separate 
			*political rights and civil liberties indexes, both of which are measured 
			*from 1 (high freedom) to 7 (low freedom). We summed these into a composite
			*index representing the level of democracy and recoded them so that higher 
			*scores reflect more democratic environments. The composite scale ranges from 2 
			*(least democratic) to 14 (most democratic).(1); We use the total FH scores
			*from the year before the election. We use combined polity scores from the 
			*year prior to the legislative election to cross-check the results. (1); The countries included reached at least a '6' on the Polity2 (1); we distinguish between three types of regimes: democracies, anocracies and autocracies. To capture these three regime types, we create an ordinal variable and code democracies as one, anocracies as two, and autocracies as three (1); The average score of Freedom House indicators of political rights and civil liberties. The pre-1973 sample includes only West European countries, coded 1 for the entire period (1)

			gen qual_democ=0
			replace qual_democ=1 if PR==1
			label var qual_democ "10 Qual. of democracy, 0/1 if PR=1 (FH)"
			
		** 39. New Democracy
			
			** A dummy variable which equals 1 when the election was held in the 
			** country which did not obtained continuously a score of 1 or 2 on 
			** political rights from 1972 until 2005 (2)
			
			gen good=0
			replace good=1 if PR==1 | PR==2
			replace good=. if PR==.
			
			gen missing=0
			replace missing=1 if PR==.
			
			gen nonmissing=0
			replace nonmissing=1 if PR~=.
			sort ccode year 
			by ccode: gen sum_missing=sum(missing)
			by ccode: gen sum_nonmissing=sum(nonmissing)			
			
			sort ccode year
			by ccode: gen sum_good=sum(good)
			replace sum_good=. if year>2005
			
			gen all_good=.
			replace all_good=1 if sum_good==sum_nonmissing & year==2005
			
			by ccode: egen max=max(all_good)
			replace max=. if year<2006
			replace max=0 if max~=1 & year>2005
			
			  
			gen new_democ=0
			replace new_democ=1 if max==0 
			replace new_democ=. if year<2006
			label var new_democ "39 New democracy, >2 PR any time before 2006 (FH)"
		
		* 30 duplicates *
		duplicates report ccode year 
		collapse (max) qual_democ new_democ, by(ccode year)
		label var qual_democ "10 Qual. of democracy, 0/1 if PR=1 (FH)"	
		label var new_democ "39 New democracy, >2 PR any time before 2006 (FH)"
		
		save "temp_FH", replace
		
								**********
								** UNDP	** DONE
								**********
	
		** 45. Human development index **
		
			use "HDI 2018.dta", clear
			
		duplicates report ccode year
		sort ccode year

		label var hdi_index "45 HDI Index (UNDP)"
		
		
 		save "temp_UN.dta", replace
 
	****************************************************************************
	
 

						******************** 
						** OECD countries ** DONE
						********************
	
			import delimited "/Users/rich/Dropbox/Data/OECD/data/OECD gov expenditures.csv", clear 
				rename time year
				rename ïlocation abb
			sort abb year
			order abb year
	

		** 33. public expenditures
			
			** Percentages, total disbursments of general govenrment to GDP, 
			** OECD values start from 1970, values fro 1965 are from CESIFO 
			** (original source is OECD), values between 1966 and 1969 are linear 
			**interpolations (2); average relative pulbic expenditure (1)
			** also check to see if World Bank has it.
	
			keep if measure=="PC_GDP"
			keep if subject=="TOT"
			rename value ggexp 
			label var ggexp "33 Gen. gov. exp (%GDP) (OECD)"
			
			keep abb year ggexp
			gen country=""
			replace country="Australia" if abb=="AUS"
			replace country="Austria" if abb=="AUT"
			replace country="Belgium" if abb=="BEL"		
			replace country="Canada" if abb=="CAN"		
			replace country="Switzerland" if abb=="CHE"
			replace country="Germany" if abb=="DEU"		
			replace country="Denmark" if abb=="DNK"		
			replace country="Spain" if abb=="ESP"		
			replace country="Finland" if abb=="FIN"		
			replace country="France" if abb=="FRA"		
			replace country="Great Britain" if abb=="GBR"		
			replace country="Greece" if abb=="GRC"		
			replace country="Ireland" if abb=="IRE"
			replace country="Iceland" if abb=="ISL"		
			replace country="Italy" if abb=="ITA"		
			replace country="Japan" if abb=="JPN"		
			replace country="South Korea" if abb=="KOR"		
			replace country="Luxemburg" if abb=="LUX"			
			replace country="Mexico" if abb=="MEX"		
			replace country="Netherlands" if abb=="NLD"		
			replace country="Norway" if abb=="NOR"
			replace country="New Zealand" if abb=="NZL"		
			replace country="Portugal" if abb=="PRT"	
			replace country="Poland" if abb=="POL"		
			replace country="Sweden" if abb=="SWE"		
			replace country="Turkey" if abb=="TUR"		
			replace country="United States" if abb=="USA"	
			replace country= "Czech Republic " if abb=="CZE"		
			replace country= "Estonia" if abb=="EST"	
			replace country= "Hungary" if abb=="HUN"	
			replace country= "Ireland" if abb=="IRL"			
			replace country= "Israel" if abb=="ISR"			
			replace country= "Luxembourg" if abb=="LUX"	
			replace country= "Latvia" if abb=="LVA"			
			replace country= "Poland" if abb=="POL"	
			replace country= "Slovak Republic" if abb=="SVK"			
			replace country= "Slovenia" if abb=="SVN"	
			replace country= "Chile" if abb=="CHL"
			replace country= "Colombia" if abb=="COL"			
			replace country= "Costa Rica" if abb=="CRI"		
			replace country= "Lithuania" if abb=="LTU"			
		
			rename country Country
			run "cow.do"

			keep ccode year ggexp
			order ccode year
			sort ccode year
			duplicates report ccode year
			browse if ccode==ccode[_n-1] & year==year[_n-1]
			save "temp_OE.dta", replace
 
						***************************
						** Quality of Government **  DONE 
						***************************

		use "qog_std_ts_jan17.dta", clear
 					
		** 118 Islamic majority population **
			
			*Dummy. We code all countries in which more than 50 percent of the population
			* adheres to some sect of Islam as one. Countries where less than half of
			*the population identifies as Muslim are coded zero.10 We deem this binary 
			** measurement in Muslim majority countries and non-Muslim majority states
			* superior to a graded measure, because there is little variation in the
			*percentage of Muslims within each of the two groups 
				* https://unstats.un.org/unsd/demographic/products/dyb/dyb2.htm
		
			gen islamic_majority=0
			bysort ccode: ipolate arda_isgenpct year, g(i_pct)		
			replace islamic_majority=1 if i_pct>.5 & i_pct~=.
			replace islamic_majority=. if i_pct==.
			label var islamic_majority "118 Country >50% Islamic (QoG)"
  
		** 53. Party fractionalisation
		
			* The probability that two deputies picked at random from the legislature 
			* will be of different parteis (1)	
		
			rename dpi_gf party_frac
			label var party_frac "53 Govt Fractionalization Index (DPI)"
			 
			drop ccode
			rename cname Country
			run "cow.do"
			tab Country if ccode==0
			drop if ccode ==0
 			duplicates report ccode year
			sort ccode year
			order ccode Country year
			browse if ccode==ccode[_n-1] & year==year[_n-1]
			
			keep ccode year party_frac islamic_majority
			sort ccode year
			duplicates report ccode year 
			collapse (max) party_frac islamic_majority , by(ccode year)
			label var party_frac "53 Govt Fractionalization Index (DPI)"	
			label var islamic_majority "118 Country >50% Islamic (QoG)"			
			save "temp_QG.dta", replace
			
		
						****************
						** BANKS DATA ** 
						****************
						
			import excel "2015 Edition CNTSDATA.xls", sheet("2015 Edition") cellrange(A2:GM16730) firstrow clear

				keep country year media2
				
			rename country Country
			run "cow.do"
			keep if year>1937		
			sort ccode year
				tab Country if ccode==0
			drop if ccode==0
			
		** 106 # of radios
		
			rename media2 radios_pc
			label var radios_pc "Radios, per capita (Banks)"
			
			** this variable ends in 1999 *
			
			duplicates report ccode year

			collapse (mean) radios , by(ccode year)
			label var radios_pc "106 Radios, per capita (Banks)"	
			gen radios_pc_ln = ln(radios_pc)
			label var radios_pc_ln "106 Radios, per capita, ln (Banks)"
			save "temp_BA.dta", replace
		
			
	
						***********************
						** Dreher et al 2008 ** DONE
						***********************
	
		use "KOFGI_2019_data.dta"

	
		** 115 KOF economic globalisation
		
			 keep code country year KOFEcGI
			 rename   KOFEcGI econ_global 
			label var econ_global "KOF index of economic globalization (KOF)"
			rename country Country
			 run "cow.do"
			sort ccode year
			browse if ccode==0
			tab Country if ccode==0
			
			drop if ccode==0
			
			duplicates report ccode year
			browse if ccode==ccode[_n-1] & year==year[_n-1]
			drop if code=="HKG"
			drop if code=="MAC"
			** 144 duplicates **
			 
			order ccode	
			label var econ_global "115 KOF index of economic globalization (KOF)"	
			drop code 
			rename Country country
			
			save "temp_KO.dta", replace
	
		
				*********************************
				*** TRANSPARENCY INTERNATIONAL ** 
				*********************************
	
		use "cpi_2012_18.dta", clear
	
					** pre-2012 values are not comparable across time and are dropped **
		
			** 58 Corruption
			 
					* TI Corruption Perception Index 
			
			label var cpi "58 Corruption Perceptions Index (TI)"
		
			save "temp_TI.dta", replace
			 
		
									***********
									** V-DEM ** 
									***********
			
			use "V-Dem-CY-Full+Others-v9.dta", clear
			 
			rename COWcode ccode
		
		** 9. Unicameralism	
			
			** v2lgbicam
					
			gen unicameral=0
			replace unicameral=1 if v2lgbicam==1
			replace unicameral=. if v2lgbicam==.		
			label var unicameral "9 Unicameral legislature (V-DEM)"
	 		
			keep ccode year unicameral
			duplicates report ccode year
			drop if ccode==.
			sort ccode year
			gen dup=0
			replace dup=1 if ccode==ccode[_n-1] & year==year[_n-1]
			drop dup
			
			
			save "temp_VD.dta", replace
			
			
									************
									*** IAEP *** 
									************
			
			use "IAEPv2_0_2015.dta", clear
			
		** 22. Federalism
		
			* dummy (2); trichotomous indicator (2); 1-5 scale (1)
			
			gen federalism=0
			replace federalism=1 if govstruct==3
			label var federalism "22 Federal system (0/1) (IAEP)"
			replace federalism=. if govstruct==.
			
		** 47. Executive responsiveness
		
			* whether the legislature can dismiss the executive (1); 
			* country is coded as presidential if the government is not subject 
			* to a vote of confidence by the Parliament (Presidential=1) (1)
 	
			gen exec_response=0
			replace exec_response=1 if removeexec==1
			replace exec_response=. if removeexec==.
			label var exec_response "47 Legislature can remove exec (0/1) IAEP)"
	
		 
		** 82. size of legislature
		
			gen leg1=lowcham
			replace leg1=. if leg1==.a
			replace leg1=. if leg1==.e			
			
			gen leg2=upcham
			replace leg2=. if leg2==.a
			replace leg2=. if leg2==.e	
			replace leg2=. if leg2==.f	
						
			gen total = . 
			qui foreach v of var leg1 leg2   { 
			replace total = cond(missing(total), `v', total + `v') if !missing(`v') 
			} 
			
			sort ccode year
			gen leg_size_ln =ln(total)
			label var leg_size "82. # seats in legislature (ln) (IAEP)"
	
	
		** 15 Automatic (Voluntary registration)
			label var suffreg "15 Is registration a de jure restriction on participation? (IAEP)"
			
			keep ccode year federalism exec_response leg_size suffreg
			save "temp_IA.dta", replace
			
	 
						*******************
						** ALESINA et al ** 
						*******************
			
					** Alesina et al (Herfindahl index) is old (2003)
					
		use "qog_std_ts_jan17.dta", clear			
		drop ccode
		
		rename cname Country
		
		run "cow.do"
		tab Country if ccode==0
		drop if ccode==0
		
		** 125 linguistic fractionalisation
		
			gen fract_lang=al_language
			label var fract_lang "125 Language fractionalisation (Alesina)" 
 	
		** 126 religious fractionalisation
		
			gen fract_rel=al_religion
			label var fract_rel "126 Relegion fractionalisation (Alesina)" 
 
	 	** 37. ethnic fractionalisation

			gen fract_ethnic=al_ethnic
			label var fract_ethnic "37 Ethnic fractionalisation (Alesina)" 

		keep ccode year fract_lang fract_rel fract_ethnic
		
		duplicates report ccode year
		sort ccode year
 
		gen dup=0
		replace dup=1 if ccode==ccode[_n-1] & year==year[_n-1]
		collapse (max)  fract_lang fract_rel fract_ethnic , by(ccode year)
		label var fract_ethnic "37 Ethnic fractionalisation (Alesina)" 
		label var fract_rel "126 Religion fractionalisation (Alesina)"
		label var fract_lang "125 Language fractionalisation (Alesina)" 
		
		save "temp_AL.dta", replace
			
				
						*****************
						** ROEDER 2002 ** 
						*****************
						
		import excel "elf.xls", sheet("Sheet1") cellrange(A2:L185) firstrow case(lower) clear

		** 48.  ELF
							
			keep ccode country elf61 elf85
			 expand 57
			sort ccode
			by ccode: gen counter=_n
			by ccode: gen year=1960+_n
			
			gen elf=.
			replace elf=elf61 if year<1985
			replace elf=elf85 if year>1984
			label var elf "48 ELF (1961 values until '84, 85 afterwards (Roeder)"
			
			keep ccode year elf
 		
			save "temp_RO.dta", replace
				
 
						***********************
						** Golden et al 2006 ** 
						***********************
	
		**	Golden, M., Walrestein, M., 2006. Union centralization among advance industrail societies: update to 1995/2000
	
				* https://dataverse.harvard.edu/dataset.xhtml?persistentId=hdl:1902.1/10193
				** additional source:  Ebbinghaus, B. Visser, J. 2000. Trade Unions in Western europe since 1945 CD-ROM Supplement Macmillan, London

			import delimited "union_centralization.csv", clear 

			replace country="Australia"  if country=="ARL"
			replace country="Canada"  if country=="CAN" 
			replace country="France"  if country=="FRA" 
			replace country="Italy"  if country=="ITA"
			replace country="Netherlands" if country=="NTH" 
			replace country="Spain"  if country=="SPN" 
			replace country="United Kingdom"  if country=="UK"
			replace country="Austria"  if country=="AUT"  
			replace country="Denmark"  if country=="DNK" 
			replace country="West Germany"   if country=="GER"
			replace country="Japan"  if country=="JPN"
			replace country="New Zealand"  if country=="NZ" 
			replace country="Sweden"  if country=="SWE"
			replace country="United States"  if country=="US" 
			replace country="Belgium"  if country=="BEL" 
			replace country="Finland"  if country=="FIN" 
			replace country="Ireland"  if country=="IRE"  
			replace country="Norway"  if country=="NOR"
			replace country="Portugal"  if country=="POR" 
			replace country="Switzerland"  if country=="SWI"  
			rename country Country
			
			run "cow.do"
					
			keep unadjcov Country ccode year totden	
		
		** 56 Unionisation
		
			* as a percentage of the labour force. Given similar problems in 
			* cross-national data here, we have made estimates for certain cases, 
			* and rounded the available values to the nearest ten percent,  (1);  
			* a function of the percentage of workers represented by unions in 1975 (1)
 
			replace unadjcov="" if unadjcov=="NA"
			destring unadjcov, replace
			rename unadjcov unionisation
			label var unionisation "56 Unionisation, filled (Golden & Walrestein 2006)"
			sort ccode year
			by ccode: replace unionisation=unionisation[_n-1] if unionisation==. & unionisation[_n-1]~=.	
			
		** 114. Union density
 
			replace totden="" if totden=="NA"
			destring totden, replace	
			label var totden "114 Union density (Golden & Walrestein 2006)"
			rename totden union_density
			sort ccode year
		
			
			save "temp_GL.dta", replace
 
						*************************
						** Paxton et al 2003    * 
						*************************
	
			import excel "suffrage_data_for_website.xls", sheet("Sheet1") firstrow case(lower) clear
		
			drop in 200/645
			drop b*   ca cb abbr
			rename sfrg00 sfrg49
			reshape long sfrg , i(name)	
			 rename _j year
			replace year=1900+year
			replace year=2000 if year==1949
			rename name Country
			run "cow.do"
					
			sort ccode year
				drop if ccode==0
			destring sfrg, replace
			order ccode	
			
		** 27 female sufferage
	
			* Paxton, Pamela, Kenneth A. Bollen, Deborah M Lee and HyoJoung Kim (2003) “Research Forum - A Half-Century of Suffrage: New Data and a Comparative Analysis,” Studies in comparative international development. 38, 1: 93.
			** Mackie, T. Rose, R. 1991. The International Almanac of Electoral History CQ, Press Washington Dc
		
			** number of years since the introduction of universal suffrage (2); 
			** dummy (1); cumulative female empowerment where franchise is the year women gained the franchise (1);
		
			** NOTE TO MYSELF--THIS VARIABLE RARELY REACHES 100. MAY NEED TO FIND AN ADDITIONAL SOURCE

			rename sfrg sufferage 
			label var sufferage "Sufferage (Paxton et al 2003)"
			drop Country
			
			duplicates report ccode year
			sort ccode year
			
			gen dup=0
			replace dup=1 if ccode==ccode[_n-1] & year==year[_n-1]
			
			collapse (max) sufferage , by(ccode year)
			label var sufferage "27 Sufferage (Paxton et al 2003)"	
			
			save "temp_PE.dta", replace
 
							*********************
							*** Robbins et al *** 
							*********************
							
		** 107 -- terrorist attacks
		
		** Those countries that experienced a terrorist attack in the 365 days 
		** preceding an election were coded 1 and 0 otherwise.
	
		** From codebook: ncount365i:  This variable is the dichotomous indicator 
		** employed to test Hypothesis 1 and the data for this were collected 
		** from the Global Terrorism Database (GTD).  Those countries that 
		** experienced a terrorist attack in the 365 days preceding an election were coded 1 and 0 otherwise.
		clear
		
		use "Voters Versus Terrorists Robbins, Hunter, and Murray Replication Data.dta"


		keep ccode year ncount365i
		label var ncount365i "107 Terrorist attack in yr bef election (Robbins et al)"
		** less than 500 obs but there are no more to fill in
		
		save "temp_RH.dta", replace
			
	 
	 
						**********************		
						** STEINER & Martin ** 
						**********************
		clear
		use "replication_data_for_Economic_Integration_Party_Polarization_and_Electoral_Turnout.dta"

		decode countrycode, generate(Country)
		run "cow.do"	
		tab Country if ccode==0
		sort ccode year
		rename Country country
		drop countrycode
		order ccode country year
		
		** 18 Legal voting age
		label var votingage "18 Legal voting age (Steiner)"
		
		** 24 share30to69 of VAP **
	
		label var share30to69ofVAP "24 Population aged 30-69/VAP (Steiner)"
	
		keep ccode year share30to69ofVAP votingage
		sort ccode year
		browse if ccode==ccode[_n-1] & year==year[_n-1]	
		** there are 9 duplicates **
		/*ccode	year	votingage	share30to69ofVAP
			200	1974	18	63.94389	*/
			
		drop if ccode==200 & year==1974 & ccode[_n-1]==200 & year[_n-1]==1974
		
		* 205	1982	18	59.75004	
		drop if ccode==205 & year==1982 & ccode[_n-1]==205 & year[_n-1]==1982
		* 220	1946	21	
		drop if ccode==220 & year==1946 & ccode[_n-1]==220 & year[_n-1]==1946
		*350	1989	18	63.17429
		drop if ccode==350 & year==1989 & ccode[_n-1]==350 & year[_n-1]==1989			
		* 390	1953	23	75.00536
		drop if ccode==390 & year==1953 & ccode[_n-1]==390 & year[_n-1]==1953			
		* 395	1959	21	69.76537
		drop if ccode==395 & year==1959 & ccode[_n-1]==395 & year[_n-1]==1959
		* 395	2003		
		drop if ccode==395 & year==2003 & ccode[_n-1]==395 & year[_n-1]==2003
		*	666	1959	18	66.38385
		drop if ccode==666 & year==1959 & ccode[_n+1]==666 & year[_n+1]==1959
		drop if ccode==740 & year==2003 & ccode[_n-1]==740 & year[_n-1]==2003		
		
		browse if ccode==ccode[_n-1] & year==year[_n-1]
		duplicates report ccode year
		
		save "temp_SM.dta", replace
			
						***********************************************		
						*** Turnout data from Ferran Martinez i Coma ** 
						***********************************************
  
  import excel "turnout.xlsx", sheet("Recovered_Sheet1") firstrow case(lower) clear

  
  
		************************
		*x* 3. Competitiveness 
			
			** generate a new variable that is the difference between p1 and p2
			destring pctp1, replace
			replace pctp2="" if pctp2=="."
			replace pctp2="25.8" if pctp2=="25..8"
			
			destring pctp2, replace
			
			rename pctp1 p1
			rename pctp2 p2
			
			
			label var p1 "Vote share of party 1 (FM)"
			label var p2 "Vote share of party 2 (FM)"	
			
	 
			
				** Bahrain 1973 is .99 for both p1 & p2 **
			
				replace p1=. if p1==.99
				replace p2=. if p2==.99	
				
				/* Ecuador's values dont make sense
				country	year	p1	p2
				Ecuador	2002	1105	9.3 */
				replace p2=. if p1==1105			
				replace p1=. if p1==1105
 
				/** These elections had the same value for p1 and p2, might be worth examining 
					country	year
					Kuwait	1999
					Papua New Guinea	1997
					Chile	1973
					Uganda	1980
					Morocco	1993
					Lebanon	2009
					Jordan	1956
					Cape Verde	2001 */

					** LOOK AT US 1978 and Cameroon 1964	
					
			gen competitive=p1-p2
			sort competitive
			label var competitive "3 Competitiveness (dif.in vote share of top 2 parties) (FM)"
			
		*****************************************************		
		** 70. short-term majority status of the government
			** use p1-50
			
			gen maj_status=p1-50
			label var  maj_status "70 Short-term majority status (FM)"		
 
		rename ccodecow ccode
		keep ccode country year eltype  concurrent p1 p2 maj_status competitive 
			 
		order ccode country year
		sort ccode year

		drop if eltype=="Presidential"
		duplicates report ccode year		
		
		** missing Cow codes **
		replace ccode=540 if country=="Angola" & ccode==.
		replace ccode=402 if country=="Cape Verde" & ccode==.		
		drop if ccode==. & country==""
		sort ccode year
		sort country year
		
		browse if ccode==ccode[_n-1] & year==year[_n-1]
		** dropping seven duplicates **
		drop if country=="Haiti" & year==2011 & country[_n-1]=="Haiti" & year[_n-1]==2011
		drop if country=="Samoa" & year==1973 & country[_n-1]=="Samoa" & year[_n-1]==1973
		drop if country=="Poland" & year==1989 & country[_n-1]=="Poland" & year[_n-1]==1989
		drop if country=="Central African Republic" & year==1993 & country[_n-1]=="Central African Republic" & year[_n-1]==1993		
		
		replace ccode=92 if country=="El Salvador" & year==1952
		

		* Now I am just keeping the first election of the year *
		drop if country=="Greece" & year==1989 & country[_n-1]=="Greece" & year[_n-1]==1989		
		drop if country=="United Kingdom" & year==1974 & country[_n-1]=="United Kingdom" & year[_n-1]==1974		
		
		 

		drop if country=="Greece" & year==2012 & country[_n-1]=="Greece" & year[_n-1]==2012		
		drop if country=="Greece" & year==2015 & country[_n-1]=="Greece" & year[_n-1]==2015	

 drop if country=="Iceland" & year==1959 & country[_n-1]=="Iceland" & year[_n-1]==1959	

 drop if country=="Iraq" & year==2005 & country[_n-1]=="Iraq" & year[_n-1]==2005	

  drop if country=="Ireland" & year==1982 & country[_n-1]=="Ireland" & year[_n-1]==1982	
 
 replace ccode=666 if country=="Israel" & ccode==.
 
 drop if country=="Israel" & year==2019 & country[_n-1]=="Israel" & year[_n-1]==2019	
  
  drop if country=="Sudan" & year==1986 & country[_n-1]=="Sudan" & year[_n-1]==1986	
   
   drop if country=="Thailand" & year==1992 & country[_n-1]=="Thailand" & year[_n-1]==1992
   
   replace ccode=640 if country=="Turkey" & ccode==.
   
    drop if country=="Turkey" & year==2015 & country[_n-1]=="Turkey" & year[_n-1]==2015
	 
		replace concurrentsamedate=0 if concurrentsamedate==. & country=="Turkey" & year==2018
		
	replace ccode=232 if country=="Andorra" & ccode==.
	
	drop ccode
	
	 rename country Country
			 run "cow.do"
	tab Country if ccode==0
	
	order ccode 

		tab Country if ccode==0
		replace ccode=345 if ccode==0
		
	browse if ccode==625
	rename Country country
	drop if country==country[_n-1] & year==year[_n-1]
		
		duplicates report ccode year
		sort ccode year
		
		browse if ccode==ccode[_n-1] & year==year[_n-1]
		browse if ccode==345
		
		drop if ccode==345 & year==2003 & concurrentsamedate==.
		
		save temp_FM.dta, replace		 
		
								**********
								** IDEA ** 
								**********
	clear
	import excel "idea turnout 2015.xls", sheet("Recovered_Sheet1") firstrow case(lower)
 
	** 16/59. (voting age population); 16 (registered voters); 4(both); 3 (other); 1(n/a)
 	** 59. cumulative electorate size --- variable name: vap
				
			 rename country Country
			 run "cow.do"
			 rename Country country
			 tab country if ccode==0
			 
			 drop if ccode==0
 
		order ccode country year eltype
 
		label var vt "Voter turnout, number voters (IDEA)"
		  
		label var vote "# voters who voted (IDEA)"
		label var vapvt "Voter turnout, % voting age population (IDEA)"
		label var vap "59 Voting age population (IDEA)"	
		label var reg "Registered voters (IDEA)"
		
		gen ln_vap=ln(vap)
		
	*x* 2.  Compulsory voting is variable name "comp"
	
		replace comp="0" if comp=="No"
		replace comp="1" if comp=="Yes"
		destring comp, replace
		label var comp "2 Compulsory voting (IDEA)"
		label var invot "Invalid votes (IDEA)"
		
		drop pop fhav fhpr fhcl
		sort ccode year
 
		drop if eltype=="EU Parliament"
		
		tab eltype
		drop if eltype=="Presidential"
		drop eltype
		
		duplicates report ccode year
	
		 
		** 28. Previous turnout **
	
		** turnout at t-1 **
		** to do after we decide on what the DV operationalisation is 
		sort ccode year
		bysort ccode: gen vt_lag=vt[_n-1]
		label var vt_lag "28 Voter turnout lagged, # voters in prev election (IDEA)"
	
		bysort ccode: gen vapvt_lag=vapvt[_n-1]
		label var vapvt_lag "28 Voter turnout lagged, % VAP (IDEA)"
		
	
	
		save "temp_ID.dta", replace
		
	
						*******************
						** 2.5. Lijphart ** 
						*******************
	
		** Lijphart spreadsheet has other data, Just focused on what we needed here 

		** THESE data are mostly stable ones so I want to collapse to CYR 
		** in order to make them possible to merge **

	
		import excel "Lijphart 2012.xlsx", sheet("elec disproport") firstrow clear
		rename country Country
		
		run "cow.do"
		tab Country if ccode==0
		 sort Country
		drop if ccode==0
		
		* 4 Effective number of political parties *
		label var ENPP "4 Effective number of political parties (Lijphart 12)"
		rename ENPP enpp_l
		
		** 12. Disproportionality
			* mean values used if more than one column for partic variables
			
		label var Disprop "12 Legislative disproportionality (Lijphart 12)"
		rename Disprop disprop_leg
		rename Presdisprop disprop_pres
		label var disprop_pres "Presidential disproportionality (Lijphart 12)"
		rename electionyear year
		sort year
		gen month=.
		replace month=04 if year=="Apr-53"
		replace year="1953" if year=="Apr-53"
		
		replace year="1951" if year=="1948/1951"
 
		replace month=12 if year=="Dec-92"
		replace year="1992" if year=="Dec-92"
		
		replace month=02 if year=="Feb-74"
		replace year="1974" if year=="Feb-74"		
		
		replace month=02 if year=="Feb-82"
		replace year="1982" if year=="Feb-82"
		
		replace month=06 if year=="Jun-02"
		replace year="2002" if year=="Jun-02"
		
		replace month=06 if year=="Jun-07"
		replace year="2007" if year=="Jun-07"		
		
		replace month=06 if year=="Jun-59"
		replace year="1959" if year=="Jun-59"		
		
		replace month=06 if year=="Jun-81"
		replace year="1981" if year=="Jun-81"		
		
		replace month=06 if year=="Jun-88"
		replace year="1988" if year=="Jun-88"		
		
		replace month=06 if year=="Jun-89"
		replace year="1989" if year=="Jun-89"		
		
		replace month=05 if year=="May-02"
		replace year="2002" if year=="May-02"		
		
		replace month=05 if year=="May-07"
		replace year="2007" if year=="May-07"		

		replace month=05 if year=="May-81"
		replace year="1981" if year=="May-81"		

		replace month=05 if year=="May-88"
		replace year="1988" if year=="May-88"		

		replace month=11 if year=="Nov-82"
		replace year="1982" if year=="Nov-82"	

		replace month=11 if year=="Nov-89"
		replace year="1989" if year=="Nov-89"	

		replace month=03 if year=="Mar-92"
		replace year="1992" if year=="Mar-92"	

		replace month=10 if year=="Oct-59"
		replace year="1959" if year=="Oct-59"	
		
		replace month=10 if year=="Oct-74"
		replace year="1974" if year=="Oct-74"	
		
		replace month=09 if year=="Sep-53"
		replace year="1953" if year=="Sep-53"	
			
		destring year, replace
		sort ccode year
		
		gen dup=0
		replace dup=1 if ccode==ccode[_n-1] & year==year[_n-1]
		drop dup
		
		collapse (mean) enpp_l disprop_leg , by(ccode year)
		label var disprop_leg "12 Legislative disproportionality (Lijphart 12)"
		label var enpp_l "4 Effective number of political parties (Lijphart 12)"
		
		save "temp_LI.dta", replace
		
						
		
*******************************************************************************

					*******************************
					** MERGING IN ALL TEMP FILES **
					*******************************
 	
	clear

 
				** YEARLY DATA **
				
		use temp_AL.dta		, clear						
		merge 1:1 ccode year using temp_BA.dta
		drop _merge
		merge 1:1 ccode year using temp_FH.dta
		drop _merge	
		merge 1:1 ccode year using temp_FM.dta
		drop _merge		
		merge 1:1 ccode year using temp_GL.dta	
		drop _merge				
		merge 1:1 ccode year using temp_IA.dta
		drop _merge		
		merge 1:1 ccode year using temp_ID.dta
		drop _merge		
		merge 1:1 ccode year using temp_KO.dta
		drop _merge		
		merge 1:1 ccode year using temp_LI.dta
		drop _merge		
		merge 1:1 ccode year using temp_OE.dta
		drop _merge	
		merge 1:1 ccode year using temp_PE.dta
		drop _merge		
		merge 1:1 ccode year using temp_PO.dta
		drop _merge		
		merge 1:1 ccode year using temp_QG.dta
		drop _merge	
		merge 1:1 ccode year using temp_RO.dta
		drop _merge		
		merge 1:1 ccode year using temp_RH.dta
		drop _merge	
		merge 1:1 ccode year using temp_SM.dta
		drop _merge		
		merge 1:1 ccode year using temp_TI.dta
		drop _merge	
		merge 1:1 ccode year using temp_UN.dta
		drop _merge		
		merge 1:1 ccode year using temp_VD.dta
		drop _merge	
		merge 1:1 ccode year using temp_WB.dta
		drop  _merge
		order ccode Country country year
		replace country=Country if country=="" & Country~=""
		drop Country
		label var ccode "COW country code"
	


					************************
					** 4. DUMMY VARIABLES **
					************************
					
	
		** 11. Switzerland dummy
		
			gen swiss=0
			replace swiss=1 if ccode==225
			label var swiss "11 Switzerland dummy"
			
		** 21. USA dummy 
		
			gen usa=0
			replace usa=1 if ccode==2
			label var usa "21 USA dummy"
		
		** 77. Sweden dummy
		
			gen sweden=0
			replace sweden=1 if ccode==380
			label var sweden "77 Sweden dummy"
		
		** 78. New Zealand	
		
			gen nz=0
			replace nz=1 if ccode==920
			label var nz "78 New Zealand dummy"
			
		** 79. Norway
		
			gen norway=0
			replace norway=1 if ccode==385
			label var norway "79 Norway dummy"
		
							***************
							*** REGIONS ***
							***************
							
		run "wb_region.do"
		
		** 40. Latin America dummy
			
			gen latin_america=0
			replace latin_america=1 if region==9
			replace latin_america=0 if ccode==2
			replace latin_america=0 if ccode==20		
			label var latin_america "40 Latin America & Caribbean region dummy"
			
		** 41. Africa dummy
		
			gen africa=0
			replace africa=1 if region==8
			replace africa=1 if region==2
			replace africa=1 if region==1
			label var africa "41 Africa region dummy"
		
		** 42. Asia dummy
		
			gen asia=0
			replace asia=1 if region==3
			replace asia=1 if region==4
			replace asia=0 if ccode==900 /*Australia */
			replace asia=0 if ccode==920  /* NZ */		
			replace asia=0 if ccode==910 /* PNG */
			replace asia=0 if ccode==940 /* Solomon Islands */
			replace asia=0 if ccode==990 /* Samoa */
			label var asia "42 Asia region dummy"
		
		** 43. Oceania dummy
		
			gen oceania=0
			replace oceania=1 if ccode==900 /*Australia */
			replace oceania=1 if ccode==920  /* NZ */			
			replace oceania=1 if ccode==910 /* PNG */
			replace oceania=1 if ccode==940 /* Solomon Islands */	
			replace oceania=1 if ccode==990 /* Samoa */	
			label var oceania "43 Oceania region dummy"
		
 		** 62. South America dummy
		
			gen samerica=0
			replace samerica=1 if ccode>99 & ccode<200 & ccode~=.
			label var samerica "62 South America region dummy"
	
		** 120 Eastern Europe	
		
			gen eeurope=0
			replace eeurope=1 if region==5
			label var eeurope "120 Eastern & Central Europe region dummy"
	
		** 101 OECD dummy

			run "oecd.do"
		
			label var oecd "101 OECD mem (Yr(t) if joined t-1)"
			
		** 44 West 
		** DEFINITION FROM STOCKEMER AND SCRUGGS IN ES 2012
		**  According to this definition we label the following countries as western: Andorra, Australia, Austria, Belgium, Canada, Cyprus, Denmark, Finland, France, Germany, Greece, Iceland, Ireland, Israel, Italy, Liechtenstein, Luxembourg, Malta, Monaco, the Netherlands, New Zealand, Norway, Portugal, Spain, Sweden, Switzerland, the UK, and the USA. Due to missing data for inequality, Liechtenstein and Monaco were dropped from the regression analysis.
			gen west=0
			replace west=1 if ccode==232
			replace west=1 if ccode==900
			replace west=1 if ccode==305
			replace west=1 if ccode==211
			replace west=1 if ccode==20
			replace west=1 if ccode==352
			replace west=1 if ccode==390
			replace west=1 if ccode==375
			replace west=1 if ccode==220
			replace west=1 if ccode==255
			replace west=1 if ccode==350
			replace west=1 if ccode==395
			replace west=1 if ccode==205
			replace west=1 if ccode==666
			replace west=1 if ccode==325
			replace west=1 if ccode==223			
			replace west=1 if ccode==212
			replace west=1 if ccode==338
			replace west=1 if ccode==221			
			replace west=1 if ccode==210
			replace west=1 if ccode==920
			replace west=1 if ccode==385		
			replace west=1 if ccode==235
			replace west=1 if ccode==230
			replace west=1 if ccode==380			  
			replace west=1 if ccode==225
			replace west=1 if ccode==200
			replace west=1 if ccode==2		
	 
			label var west "44 Western country (Stockemer & Scruggs 2012)"
	 
	 
	 
							******************
							** TIME DUMMIES **
							******************
		drop if year<1944
		
		** 80. 1919-1994 years dummy
		
		gen y1919_1994=0
		replace y1919_1994=1 if year>1918 & year<1995
		label var y1919_1994 "80 Dummy for years between 1919 & 1994"
		
		** 81. 1945 onwards year dummy 
		
		gen post_ww2=0
		replace post_ww2=1 if year>1944  
		label var post_ww2 "81 Dummy for years after 1944"
		
		** 23. Time trends
		
		* The trend variable is equal to 1 for the elections that occurred in the ///
		* first year of our sample, that is, in 1986 and to 25 in 2010. (1); ///
		* For each country, the first parliamentary election associated with the ///
		* country’s Third Wave transition to democracy is scored as a 1, the second ///
		* election as 2, the third election as 3, and the fourth election as 4. (1); ///
		* This time variable serves to control for the trend towards lower turnout ///
		* that all democratic countries (Western and nonWestern) have experienced /// 
		* during the last four decades (1); Our time variable captures this continuous ///
		*decrease and therefore picks up some of the unexplained variance in the model. ///
		* (1); To capture some of the variance over time and to allow for a purer ///
		* identification of the independent variables or fixed effects, we include a ///
		* time variable coded zero for 1970 and 40 for 2010 to both models. Substantively, ///
		* this time trend accounts for the decline in turnout that developed and, ///
		* to a lesser degree, developing countries have experienced over the past decades. (1)
		
		gen time_trend= year-1944
		label var time_trend "23 Linear time trend (1945=1)"
		
		

		order _all, alphabetic 
		order ccode country year
		label data "EBA Country-year data"	
		save merged_cyr.dta, replace
	

********************************************************************************
				*************************
				** ELECTION-LEVEL DATA **
				*************************

						****************
						** 2.2. NELDA ** 
						****************
								
		use "id & q-wide_share.dta", clear
	

		** 127.	Opposition harrassment 
		
			replace nelda15="0" if nelda15=="no"
			replace nelda15="1" if nelda15=="yes"
			replace nelda15="" if nelda15=="unclear"		
			replace nelda15="" if nelda15=="N/A"		
			destring nelda15, replace
			rename nelda15 harassment 
			label var harassment "127 Opp. harassment (NELDA)"	

		** 129.	boycott
		
			replace nelda14="0" if nelda14=="no"
			replace nelda14="1" if nelda14=="yes"
			replace nelda14="" if nelda14=="unclear"		
			replace nelda14="" if nelda14=="N/A"		
			destring nelda14, replace
			rename nelda14 boycott
			label var boycott "129 Boycott (NELDA)"
		
		** 130.	violence
		
			gen violence=0
			replace violence=1 if nelda33=="yes"
			replace violence=1 if nelda31=="yes"
			replace violence=1 if nelda29=="yes"
			label var violence "130 Nelda violence, nelda33,31,29 (NELDA)"

		** 128.	Opposition ban
	
			gen ban=0
			replace ban=1 if nelda13=="yes"
			label var ban "128 Opposition leaders banned (NELDA)"
	
	
		*x* 7. Concurrent -- concurrentsamedate
			sort ccode year mmdd
			gen concurrent=0
			replace concurrent=1 if ccode ==ccode[_n-1] & year==year[_n-1] & mmdd==mmdd[_n-1]
			replace concurrent=1 if ccode ==ccode[_n+1] & year==year[_n+1] & mmdd==mmdd[_n+1]
				
			label var concurrent  "7 Concurrent elections on same date (NELDA)"
		
 		keep if types=="Legislative/Parliamentary"
 
	
		** 38. # of elections
		
			* number of elections that have occurred since the reestablishment of democracy (1); 
			* number of elections since independence through 1989(1); 
			* number of multiparty legislative elections (1); 
 
			sort ccode year mmdd	
			gen dummy=1
			
			by ccode: gen num_elections=sum(dummy)
	 
			label var num_elections "38 # leg elections since 1945 (NELDA)"
		
		keep ccode country year mmdd electionid harassment boycott violence num_elections ban
		 
		 
		** 66. 2nd election			
			
			gen election_2=0
			replace election_2=1 if num_elections==2
			label var election_2 "66 Second election, dummy (NELDA)"
			
		** 49. 3rd election
		
			gen election_3=0
			replace election_3=1 if num_elections==3
			label var election_3 "49 Third election, dummy (NELDA)"			
		
		** 50. 4th election
		
			gen election_4=0
			replace election_4=1 if num_elections==4
			label var election_4 "50 Fourth election, dummy (NELDA)"			
		
		** 67. 3rd and 4th

			gen election_3_4=0
			replace election_3_4=1 if num_elections==3
			replace election_3_4=1 if num_elections==4			
			label var election_3_4 "67 Third & fourth elections, dummy (NELDA)"
 		
		** 68. 5th and 6th election

			gen election_5_6=0
			replace election_5_6=1 if num_elections==5
			replace election_5_6=1 if num_elections==6			
			label var election_5_6 "68 Fifth & sixth elections, dummy (NELDA)"		
		
		** 102 7th and 8th election
	
			gen election_7_8=0
			replace election_7_8=1 if num_elections==7
			replace election_7_8=1 if num_elections==8			
			label var election_7_8 "102 Seventh & eighth elections, dummy (NELDA)"				
 
		duplicates report electionid
		duplicates report ccode year
		duplicates report ccode year mmdd
		browse if ccode==ccode[_n-1] & year==year[_n-1] & mmdd==mmdd[_n-1]

	* Poland 1989
		* electionid=="290-1989-0604-L2 
		
		browse if country=="Poland" & year==1989
		drop if electionid=="290-1989-0604-L2"
		
	* Egypt 2011 	651-2011-1205-L2"
		browse if country=="Egypt" & year==2011	
		drop if electionid=="651-2011-1128-L2"	
	
	 
* Monaco	221-1973-0204-L1	1973
		browse if country=="Monaco" & year==1973	
		drop if electionid=="221-1973-0204-L2"		
		
* Kiribati	970-2007-0822-L2	2007
		browse if country=="Kiribati" & year==2007	
		drop if electionid=="970-2007-0822-L2"	
		
		
		save "temp_NE.dta", replace

		
********************************************************************************
	
						
						*****************
						** 2.6. Golder ** 
						*****************
	
		** V3 just came out in 2017
				** 134 countries but no more than 51 countries in sample per year
		
		use "es_data-v3.dta", clear

		replace month="" if month=="date unknown"
		replace month="" if month=="unopposed"
		replace day="" if day=="date unknown"
		replace day="" if day=="unopposed"
							
		destring month, replace
		destring day, replace
		gen mmdd=(month*100) + day
		sort mmdd
		browse mmdd month day
		
		** 5. Proportional representation
		
			tab legislative_type
			gen pr=0
			replace pr=1 if legislative_type=="2"
			
			label var pr "5 Proportional representation (Golder)"
			
		** 26 Majority
		
			gen majoritarian=0
			replace majoritarian=1 if legislative_type=="1"
			label var majoritarian "26 Majoritarian electoral system (Golder)"
			
		** 31 Mixed/34 semi presidential systems
		
			gen mixed=0
			replace mixed=1 if legislative_type=="3"
			label var mixed "31 34 Mixed electoral system (Golder)"	
			
			
		** 14. District magnitude
		
			* average number of candidates elected from an electoral district (4); 
			* median distritct magnitude (1); 
			* the total nmber of seats allocated in the lowest tier divided by 
			* the total number of districts in htat tier in 2000 (1); 
			* the size of electoral districst in terms of the number of seats assigned to each district(1);
				* an ordinal variable representing the extent to which electoral districts 
			*apporximate pure proportional representation (1);
			
			label var tier1_avemag "14 District magnitude (Golder)"
			replace tier1_avemag="" if tier1_avemag=="NA"

			rename tier1_avemag dist_mag
			destring dist_mag, replace			
			replace dist_mag =. if dist_mag ==-99
			replace dist_mag =. if dist_mag ==-88
		
		** 25. Plurality
		 
			* dummy which equals 1 when the election was held under plurality rule (2); 
			* dummy if the election is held using single member district pluarltiy electoral rules, 
			* and 0 otherwise (1); 
			* Presidential electoral rules is a categorical measure that distinguishes 
			* elections under plurality electoral rules,10 first-round elections in runoff systems, 
			* and second-round elections in runoff systems, with three dummy variables. 
			* Because we expect turnout in plurality and second-round elections to be 
			* significantly lower than that in first-round elections (but not particularly 
			* different from each other), we use the first-round election variable 
			* as the excluded category (1)
			
			** I'm using the definitions for the first tier, which I think is what we want
			** for lower house
			
			replace tier1_formula="" if tier1_formula=="NA"
			destring tier1_formula, replace
			
			gen plurality=0
			replace plurality=1 if tier1_formula==1
			replace plurality=1 if tier1_formula==2 
			replace plurality=1 if tier1_formula==28			
			replace plurality=1 if tier1_formula==8			
			replace plurality=1 if tier1_formula==9			
			replace plurality=1 if tier1_formula==6
			replace plurality=1 if tier1_formula==7			
			replace plurality=1 if tier1_formula==10		
			label var plurality "25 Plurality system (Golder)"
			
			* 4. Effective number of political parties 
			
			replace enpp="" if enpp=="NA"
			destring enpp, replace 
		  label var  enpp "4 Effective # of (leg) political parties (Golder)"			
				replace enpp=. if enpp==-99
				
		** 84. Effective number of political parties (squared) **
			** should also be in Q of G **
		
			gen enpp_sq=enpp*enpp
			  label var  enpp_sq "84 Effective # of (leg) political parties, squared (Golder)"		
			
 	
		sort ccode year
		drop if presidential==1
		duplicates report ccode year
		
		  ** 683 duplicates **
		  
		keep ccode year mmdd month day pr dist_mag plurality mixed majoritarian legislative_type enpp enpp_sq
		
		order ccode year month day mmdd
 	
		save "temp_GO.dta", replace
		
		
********************************************************************************
********************************************************************************
********************************************************************************
 
		*************************************
		** MERGING ELECTION_LEVEL DATASETS **
		*************************************
		
		** Only running models on democracies and elections for lower house.
		** This is because the DV is two rows for mixed elections 
	 			
				
	** I NEED TO CREATE AN ELECTION ID TO MERGE THESE TWO DATA SOURCES **
	 
		use temp_NE.dta, clear
	 
		merge 1:1 ccode year mmdd using temp_GO.dta
		drop _merge	
		
		** 1185 elections merged **
		order ccode country electionid year month day mmdd
		
		save merged_election.dta, replace
	
		** NEED LIST OF FIRST ELECTIONS TO CREATE VAR 105 **
		browse ccode country year if ccode~=ccode[_n-1]
			
	*** TO GENERATE AFTER MERGING ***
	
		sort ccode year mmdd
		
		** 65. Time since previous election	
			 
			by ccode:	gen prev_el_time=year-year[_n-1]
			label var prev_el_time "65 Years since previous election (generated)"
			

		** merging in cyr data **
		
		merge m:1 ccode year  using "merged_cyr.dta"
			drop _merge
		sort ccode year 	

	*** LAST CLEANING AND ORGANIZING ***	
		
		drop month day eltype
		
		label var ccode "COW country code"
		label var electionid "NELDA election id"
		label var year "Year"
		label var mmdd "Election month and day"

	** GENERATING A FEW LAST VARIABLES **	
	
		gen popl=ln(population)
		label var popl "8 Poplation (ln)"
		 
		gen enppl=ln(enpp)
		label var enppl "4 Effective # of (leg) political parties, ln (Golder)" 

		gen enpp_sql=ln(enpp_sq)
		label var  enpp_sql  "84 Effective # of (leg) political parties, squared ln (Golder)"
		
		gen democracy=0
		 replace democracy=. if polity2==.
		 replace democracy=1 if polity2>5 & polity2~=.

		
		gen autocracy=0
		replace autocracy=. if polity2==.
		replace autocracy=1 if polity2<-5 & polity2~=.

		gen anocracy=0
		replace anocracy=. if polity2==.
		replace anocracy=1 if polity2>=-5 & polity2<=5 & polity2~=.
		
		 
		 
		label data "EBA turnout data (Ferran & Rich)"
		save turnout_eba_merged.dta, replace
		saveold turnout_eba_merged_13.dta, version(13) replace
						
					*******************************	 
					** STANDARDIZING AND LAGGING **
					*******************************
					
		drop if year==2019
		drop post_ww2 p1 p2 invot
		sort ccode year
	
		***************************
		** Socioeconomic factors ** 
		***************************
		
		** 30 GNI **
			
			browse country year vt vapvt gni_ln
			egen gni_ln_s=std(gni_ln)
			by ccode: gen gni_ln_sl=gni_ln_s[_n-1]
			label var gni_ln_sl "30 GNI per cap, ln, std, lag (const 2010 USD) (WB)"
			
			sum gni_ln_sl 
			codebook ccode if gni_ln_sl~=.
			codebook year if gni_ln_sl~=.
			drop gni_ln gdp_pc_ppp gni_pc_constant gni_ln_s
			
		** 8 Population *
		
			browse country year vt vapvt popl population
			egen pop_ln_s=std(popl)
			by ccode: gen pop_ln_sl=pop_ln_s[_n-1]
			label var pop_ln_sl "8 Population, ln, std, lag (WB)"
			sum pop_ln_sl 
			codebook ccode if pop_ln_sl~=.
			codebook year if pop_ln_sl~=.
			drop pop_ln_s population popl
					
		** 60 GNI per cap growth  *
		
			browse country year vt vapvt gni_pc_growth
			egen gni_pc_growth_s=std(gni_pc_growth)
			by ccode: gen gni_pc_growth_sl=gni_pc_growth_s[_n-1]
			label var gni_pc_growth_sl "60 GNI pc growth (annual %) std, lag (WB)"
			sum gni_pc_growth_sl 
			codebook ccode if gni_pc_growth_sl~=.
			codebook year if gni_pc_growth_sl~=.
			drop gni_pc_growth_s gni_pc_growth 		
		
		** 16 Urban population *
 		  
			browse country year vt vapvt urban_pop_pct
			egen urban_pop_pct_s=std(urban_pop_pct)
			by ccode: gen urban_pop_pct_sl=urban_pop_pct_s[_n-1]
			label var urban_pop_pct_sl "16 Urban population, % total, std, lag (WB)"
			sum urban_pop_pct_sl 
			codebook ccode if urban_pop_pct_sl~=.
			codebook year if urban_pop_pct_sl~=.
			drop urban_pop_pct_s urban_pop_pct			
		
		** 20 Inequality, Gini index 
		
			browse country year vt vapvt gini
			by ccode: ipolate gini year, gen(igini)
			egen gini_s=std(igini)
			by ccode: gen gini_sl=gini_s[_n-1]
			label var gini_sl "20 GINI (Inequality proxy), interplolated std, lag (WB)"
			sum gini_sl 
			codebook ccode if gini_sl~=.
			codebook year if gini_sl~=.
			drop gini gini_s igini		
		
		
		** 29 Population density,people per sq.km land area (WB)
		
			browse country year vt vapvt pop_density_ln
			egen pop_density_ln_s=std(pop_density_ln)
			by ccode: gen pop_density_ln_sl=pop_density_ln_s[_n-1]
			label var pop_density_ln_sl "29 Population density, people per sq.km land area, ln, lag, std (WB) "
			sum pop_density_ln_sl 
			codebook ccode if pop_density_ln_sl~=.
			codebook year if pop_density_ln_sl~=.
			drop pop_density pop_density_ln pop_density_ln_s 	
 	
		** 32 Life expectancy 
		
			sort ccode year
			browse country year ccode vt vapvt life_exp
			** China had values above 100 **
			replace life_exp=. if life_exp>90 & life_exp~=.
			egen life_exp_s=std(life_exp)
			by ccode: gen life_exp_sl=life_exp_s[_n-1]
			label var life_exp_sl "32 Life expectancy at birth, tot yrs, lag, std (WB) "
			sum life_exp_sl 
			codebook ccode if life_exp_sl~=.
			codebook year if life_exp_sl~=.
			drop life_exp life_exp_s 
		
		** 37 Ethnic fractionalisation (doesnt vary by year)
 	
			sort ccode year
			browse country year ccode vt vapvt fract_ethnic
			egen fract_ethnic_s=std(fract_ethnic)
			label var fract_ethnic_s "37 Ethnic fractionalisation, std (Alesina)"
			sum fract_ethnic_s 
			codebook ccode if fract_ethnic_s~=.
			codebook year if fract_ethnic_s~=.
			drop fract_ethnic
		
		** 45 HDI Index ** 
			
			sort ccode year 
			browse country year ccode vt vapvt  hdi_index
			egen hdi_index_s=std(hdi_index)
			label var hdi_index_s "45 HDI Index, std (UNDP) "
			sum hdi_index_s 
			codebook ccode if hdi_index_s~=.
			codebook year if hdi_index_s~=.
			drop hdi_index		
		
		** 51 Inflation **
		
			sort ccode year 
			browse country year ccode vt vapvt  inflation
			gen inflation_ln=ln(inflation)
			egen inflation_ln_s=std(inflation_ln)
			by ccode: gen inflation_ln_sl=inflation_ln_s[_n-1]
			label var inflation_ln_sl " 51 Inflation, consumer price, ln, std, lag (WB) "
			sum inflation_ln_sl 
			codebook ccode if inflation_ln_sl~=.
			codebook year if inflation_ln_sl~=.
			drop inflation_ln_s inflation_ln inflation				
		
		** 52 Unemployment **
		
		
			sort ccode year 
			browse country year ccode vt vapvt  unemployment
 			egen unemployment_s=std(unemployment)
			by ccode: gen unemployment_sl=unemployment_s[_n-1]
			label var unemployment_sl "52 Unemployment, total, %labor force, std, lag (WB)"
			sum unemployment_sl 
			codebook ccode if unemployment_sl~=.
			codebook year if unemployment_sl~=.
			drop  unemployment unemployment_s 			
		
		** 75 Subnational revenue **
 
			sort ccode year 
			browse country year ccode vt vapvt  subnat_rev
			gen subnat_rev_ln=ln(subnat_rev)
 			egen subnat_rev_s=std(subnat_rev_ln)
			by ccode: gen subnat_rev_sl=subnat_rev_s[_n-1]
			label var subnat_rev_sl "75 Sub-national Revenues (% of total revenues), ln, std, lag  (WB)"
			sum subnat_rev_sl 
			codebook ccode if subnat_rev_sl~=.
			codebook year if subnat_rev_sl~=.
			drop subnat_rev subnat_rev_ln subnat_rev_s  	
		
		
		** 74 subnat_exp **
		
			sort ccode year 
			browse country year ccode vt vapvt  subnat_exp
			gen subnat_exp_ln=ln(subnat_exp)
 			egen subnat_exp_s=std(subnat_exp_ln)
			by ccode: gen subnat_exp_sl=subnat_exp_s[_n-1]
			label var subnat_exp_sl "75 Sub-national Revenues (% of total revenues), ln, std, lag  (WB)"
			sum subnat_exp_sl 
			codebook ccode if subnat_exp_sl~=.
			codebook year if subnat_exp_sl~=.
			drop  subnat_exp subnat_exp_ln subnat_exp_s
				
		** 101 OECD ***
			browse country year ccode vt vapvt oecd
			drop oecd
			run "/Users/rich/Dropbox/Data/OECD/do/Creating OECD membership variable.do"
			label var oecd "101 OECD mem (Yr(t) if joined t-1)"
			sum oecd
			codebook ccode if oecd~=.
			codebook year if oecd~=.
		
		
		** 106 Radios per capita
		  
			sort ccode year 
			browse country year ccode vt vapvt  radios_pc_ln
 			egen radios_pc_ln_s=std(radios_pc_ln)
			by ccode: gen radios_pc_ln_sl=radios_pc_ln_s[_n-1]
			label var radios_pc_ln_sl "106 Radios, per capita, ln, std, lag (Banks)"
			sum radios_pc_ln_sl 
			codebook ccode if radios_pc_ln_sl~=.
			codebook year if radios_pc_ln_sl~=.
			drop  radios_pc radios_pc_ln radios_pc_ln_s 
		
		** 115 KOF economic globalization
		
		
			sort ccode year 
			browse country year ccode vt vapvt  econ_global
 			egen econ_global_s=std(econ_global)
			by ccode: gen econ_global_sl=econ_global_s[_n-1]
			label var econ_global_sl "115 KOF index of economic globalization, std, lag (KOF)"
			sum econ_global_sl 
			codebook ccode if econ_global_sl~=.
			codebook year if econ_global_sl~=.
			drop econ_global econ_global_s 
		
		** 118 Islamic majority
		
			sort ccode year 
			browse country year ccode vt vapvt  islamic_majority if islamic_majority==1 & islamic_majority[_n-1]==0
			** there are a few countries that flip back and forth **	
			
		** 125 fract_lang 
		
			sort ccode year 
			browse country year ccode vt vapvt  fract_lang
 			egen fract_lang_s=std(fract_lang)
			label var fract_lang_s "125 Language fractionalisation, std (Alesina)"
			sum fract_lang_s 
			codebook ccode if fract_lang_s~=.
			codebook year if fract_lang_s~=.
			drop fract_lang  
 	
		** 126 fract_rel
		
			sort ccode year 
			browse country year ccode vt vapvt  fract_rel
 			egen fract_rel_s=std(fract_rel)
			label var fract_rel_s "126 Religion fractionalisation, std (Alesina) "
			sum fract_rel_s 
			codebook ccode if fract_rel_s~=.
			codebook year if fract_rel_s~=.
			drop fract_rel		
		
		***************************
		** Institutional factors ** 
		***************************
		
		** 5 Proportional representation
		
			* there are some with elections but no PR coded, I tried to fix this
			sort ccode year 
			browse country year ccode vt vapvt  pr
		 	sum pr
			codebook ccode if pr~=.
			codebook year if pr~=.	 
		
		** 2 Compulsory voting
		
			sum comp
			codebook ccode if comp~=.
			codebook year if comp~=.	
		
		** Concurrent
		
			sum concurrentsamedate
			codebook ccode if concurrentsamedate~=.
			codebook year if concurrentsamedate~=.		
		
		** 4 Effective number of political parties
			sum enpp
			codebook enpp
			codebook ccode if enpp~=.
			codebook year if enpp~=.				
			egen enpp_s=std(enpp)
			label var enpp_s "4 Effective # of (leg) political parties, std (Golder)"
			sum enpp_s 
			codebook ccode if enpp_s~=.
			codebook year if enpp_s~=.
			drop enpp		
 
		** 84 Eff num parties squared
			sum enpp_sq
			egen enpp_sq_s=std(enpp_sq)
			label var enpp_sq_s "84 Effective # of (leg) political parties, squared, std (Golder)"
			sum enpp_sq_s 
			codebook ccode if enpp_sq_s~=.
			codebook year if enpp_sq_s~=.
			drop enpp_sq				
 	
		** 9 Unicameral legislature (V-DEM)
			
			sum unicameral

		** 14 District magnitude	

 			sum dist_mag 
			egen dist_mag_s=std(dist_mag)
			label var dist_mag_s "14 District magnitude (Golder)"
			sum dist_mag_s 
			codebook ccode if dist_mag_s~=.
			codebook year if dist_mag_s~=.
			drop dist_mag
 
		** 15 Is registration a de jure rest. on part.?, std (IAEP)"
		
 			sum suffreg 
 
		** 22 Federalism 	
		
			sum federalism
			codebook ccode if federalism~=.	
			codebook  year if federalism~=.	
		
		** 25 plurality 	
		
		sum plurality
		codebook ccode if plurality~=.	
		codebook  year if plurality~=.		
			
		** 26 Majority
	
		sum majoritarian
		codebook ccode if majoritarian~=.	
		codebook  year if majoritarian~=.		
		
		** 27 Female sufferage
	
		sum sufferage
		browse country year ccode vt sufferage
		egen sufferage_s=std(sufferage)
		label var sufferage_s "27 Sufferage, std (Paxton et al 2003)"
		sum sufferage_s 
		codebook ccode if sufferage_s~=.
		codebook year if sufferage_s~=.
		drop sufferage		
		
		** 31 Mixed
	
		sum mixed
		codebook mixed
		codebook ccode if mixed~=.	
		codebook  year if mixed~=.

		** 53 Govt fractionalisation
	
		sum party_frac
		codebook party_frac		
		browse country year ccode vt party_frac
		egen party_frac_s=std(party_frac)
		label var party_frac_s "53 Govt Fractionalization Index, std (DPI)"
		sum party_frac_s 
		codebook ccode if party_frac_s~=.
		codebook year if party_frac_s~=.
		drop party_frac		
		
				
		** 82 Size of leg
	
		sum leg_size_ln
		codebook leg_size_ln
		codebook ccode if leg_size_ln~=.	
		codebook  year if leg_size_ln~=.	
		browse country year ccode vt leg_size_ln
		egen leg_size_ln_s=std(leg_size_ln)
		label var leg_size_ln_s "82. # seats in legislature (ln) (IAEP)"
		sum leg_size_ln_s 
		codebook ccode if leg_size_ln_s~=.
		codebook year if leg_size_ln_s~=.
		drop leg_size_ln	
		
		***********************
		** Political factors ** 
		***********************		
				
		* 3	Closeness/competitiveness
		
			sum competitive
			codebook competitive
			codebook ccode if competitive~=.	
			codebook  year if competitive~=.
			browse country year ccode vt vt_lag competitive
			egen competitive_s=std(competitive)
			label var competitive_s "3 Competitiveness (dif.in vote share of top 2 parties), std (FM) "
			sum competitive_s 
			codebook ccode if competitive_s~=.
			codebook year if competitive_s~=.
			drop competitive
		
		*10	Polity 2
		
			sort ccode year
			sum polity2
			codebook polity2
			browse country year ccode vt vt_lag polity2
			egen polity2_s=std(polity2)
			by ccode: gen polity2_sl=polity2_s[_n-1]
			label var polity2_sl  "10 Polity2, std, lagged  (Polity IV) "
			sum polity2_sl 
			codebook ccode if polity2_sl~=.
			codebook year if polity2_sl~=.
			drop polity2 polity2_s		
		
			
		* 10	Quality of Democracy
	
			sum qual_democ
			codebook qual_democ
			browse country year ccode vt vt_lag qual_democ		
		
		* 38	# elections held
	
			sum num_elections
			hist num_elections, freq
			codebook num_elections
			codebook ccode if num_elections~=.	
			codebook  year if num_elections~=.	
			gen num_elections_ln=ln(num_elections)
			label var num_elections_ln "38 # leg elections since 1945, ln (NELDA)"
			egen num_elections_ln_s=std(num_elections_ln)
			label var num_elections_ln_s "38 # leg elections since 1945, ln, std (NELDA)"
			sum num_elections_ln_s 
			codebook ccode if num_elections_ln_s~=.
			codebook year if num_elections_ln_s~=.
			drop num_elections num_elections_ln 				
		
		* 70 Short term marjority status of govt
		
			sum maj_status
			codebook maj_status
			codebook ccode if maj_status~=.	
			codebook  year if maj_status~=.
			browse country year ccode vt vt_lag maj_status
			egen maj_status_s=std(maj_status)
			label var maj_status_s "70 Short-term majority status (FM)"
			sum maj_status_s 
			codebook ccode if maj_status_s~=.
			codebook year if maj_status_s~=.
			drop maj_status  		
		
		* 127   harassment
		
			sum harassment
			codebook ccode if harassment~=.	
			codebook  year if harassment~=.			
		
		*128	Opposition ban
		
			sum ban
			codebook ccode if ban ~=.	
			codebook  year if ban ~=.	
		
		*129	Boycott
		
			sum boycott
			codebook ccode if boycott~=.	
			codebook  year if boycott~=.		

		*130	Violence
			sum violence
			codebook ccode if violence~=.	
			codebook  year if violence~=.		
		
		*65	Time since previous election
			sum prev_el_time
			codebook prev_el_time
			codebook ccode if prev_el_time~=.	
			codebook  year if prev_el_time ~=.
			browse country year ccode vt vt_lag prev_el_time 
			egen prev_el_time_s=std(prev_el_time)
			label var prev_el_time_s "65 Years since previous election, std (generated)"
			sum prev_el_time_s 
			codebook ccode if prev_el_time_s~=.
			codebook year if prev_el_time_s~=.
			drop prev_el_time  		
		
		* 59	Cumulative electorate size
		
			sum ln_vap
			codebook ln_vap
			codebook ccode if ln_vap~=.	
			codebook  year if ln_vap~=.
			browse country year ccode vt vt_lag ln_vap
			egen ln_vap_s=std(ln_vap)
			label var ln_vap_s "59	Cumulative electorate size, ln, std "
			sum ln_vap_s 
			codebook ccode if ln_vap_s~=.
			codebook year if ln_vap_s~=.
			drop ln_vap  			
			
		*23	Linear time trend (1945=1)
		
			sum time_trend
			codebook ccode if time_trend~=.	
			codebook  year if time_trend~=.
			codebook time_trend
			browse country year ccode vt vt_lag time_trend
			egen time_trend_s=std(time_trend)
			label var time_trend_s "23 Linear time trend (1945=1), std "
			sum time_trend_s 
			codebook ccode if time_trend_s~=.
			codebook year if time_trend_s~=.
			drop time_trend  				
		
			*47	Cumulative executive responsiveness		
			
			sum exec_response
			codebook ccode if exec_response~=.	
			codebook  year if exec_response~=.
			browse country year ccode vt vt_lag  exec_response
			
		** 24 Population 30-69/VAP (steiner)	
			
			sum share30to69ofVAP
			browse country year ccode vt vt_lag share30to69ofVAP
		
		**  114 Union density (Golden & Walrestein 2006) union_density
			sum union_density
		
		** 18 Legal voting age (Steiner)
			sum votingage
		
		** dropping data that are too few and break models
		drop cpi ggexp dem_exp literacy disprop_leg elf ncount365i new_democ ln_vap_s
		
							*********************	
							** SAVING DATASETS **
							*********************
							
		/*order ccode country  electionid year mmdd vt vt_lag vapvt vapvt_lag ln_vap vote  ///
		 comp competitive enpp_l pr concurrent popl unicameral ///
		 polity2 qual_democ swiss disprop_leg gdp_pc_growth dist_mag suffreg ///
		 harassment violence ban boycott plurality enpp_sq num_elections ///
		 election_2 election_3 election_4 election_3_4 election_5_6 election_7_8 ///
		 majoritarian mixed prev_el_time africa asia cpi dem_exp econ_global ///
		 eeurope elf exec_response federalism fract_ethnic fract_lang fract_rel ///
		 ggexp gini gni_pc_constant gni_pc_growth gni_ln hdi_index inflation islamic_majority ///
		 latin_america leg_size_ln life_exp literacy maj_status ncount365i ///
		 new_democ norway nz oceania oecd party_frac pop_density population ///
		 post_ww2 radios_pc samerica subnat_exp subnat_rev sufferage sweden ///
		 time_trend unemployment union_density unionisation urban_pop_pct ///
		 usa west y1919_1994 */
		 
		label data "Standardizing and lagged EBA turnout data (Ferran & Rich)"
		save turnout_eba_stdlag.dta, replace

	 
				
	*****************************
	*** ones we do not include **
	*****************************
	
 	** 57. Effective electoral threshold (ln) ** CANNOT FIND rep data for Golder et al 2017 piece **
	** 83. Majority (multimember)  ** CANNOT FIND **
	 	
	** 54. One party majority government
		** we exclude this variable as it is expost determined	
	** 61 recent threat to democracy
	** 64 relevant electied president
	** 69 electoral volatility
	** 71. Voting holiday
	** 72 voting share of left partioes
	** 73
	** 85 frequent changes of executive
	** 86 party group linkages
	** 87 socially owned enterprised
	** 88 	private system
	** 89 mixed system
	** 91-97 also excluded
	** 98 postmaterialist party
	** 99 cohabitation
	** 100 legislators in national elections
	** 103 party composition
	** 104 PR seats (%)
	** 108 Party replacement 
	** 109-113 do not include
	** 116 pre-electoral coalitions
	** 117 Dispersion
	** 121 direct election
	** 122 short-term mean margin of victory
	** 131 party linkages 
	** 119 Personal vote
			 
	
